Re: [PATCH 3/3] Revert "e1000e: Do not read ICR in Other interrupt"
From: Benjamin Poirier
Date: Mon Jan 29 2018 - 02:40:08 EST
On 2018/01/26 13:01, Alexander Duyck wrote:
> On Fri, Jan 26, 2018 at 1:12 AM, Benjamin Poirier <bpoirier@xxxxxxxx> wrote:
> > This reverts commit 16ecba59bc333d6282ee057fb02339f77a880beb.
> >
> > It was reported that emulated e1000e devices in vmware esxi 6.5 Build
> > 7526125 do not link up after commit 4aea7a5c5e94 ("e1000e: Avoid receiver
> > overrun interrupt bursts"). Some tracing shows that after
> > e1000e_trigger_lsc() is called, ICR reads out as 0x0 in e1000_msix_other()
> > on emulated e1000e devices. In comparison, on real e1000e 82574 hardware,
> > icr=0x80000004 (_INT_ASSERTED | _LSC) in the same situation.
> >
> > Some experimentation showed that this flaw in vmware e1000e emulation can
> > be worked around by not setting Other in EIAC. This is how it was before
> > commit 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt").
> >
> > Since the ICR read in the Other interrupt handler has already been
> > restored, this patch effectively reverts the remainder of commit
> > 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt").
> >
> > Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts")
> > Signed-off-by: Benjamin Poirier <bpoirier@xxxxxxxx>
> > ---
> > drivers/net/ethernet/intel/e1000e/netdev.c | 10 ++++++++--
> > 1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> > index ed103b9a8d3a..fffc1f0e3895 100644
> > --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> > +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> > @@ -1916,6 +1916,13 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
> > struct e1000_hw *hw = &adapter->hw;
> > u32 icr = er32(ICR);
> >
> > + /* Certain events (such as RXO) which trigger Other do not set
> > + * INT_ASSERTED. In that case, read to clear of icr does not take
> > + * place.
> > + */
> > + if (!(icr & E1000_ICR_INT_ASSERTED))
> > + ew32(ICR, E1000_ICR_OTHER);
> > +
>
> This piece doesn't make sense to me. Why are we clearing OTHER if
> ICR_INT_ASSERTED is not set?
Datasheet §10.2.4.1 ("Interrupt Cause Read Register") says that ICR read
to clear only occurs if INT_ASSERTED is set. This corresponds to what I
observed.
However, while working on these issues, I noticed that when there is an rxo
event, INT_ASSERTED is not always set even though the interrupt is raised. I
think this is a hardware flaw.
For example, if doing
ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER);
we enter e1000_msix_other() and two consecutive reads of ICR result in
0x81000004
0x00000000
If doing
ew32(ICS, E1000_ICS_RXO | E1000_ICS_OTHER);
we enter e1000_msix_other() and two consecutive reads of ICR result in
0x01000041
0x01000041
Consequently, we must clear OTHER manually from ICR, otherwise the
interrupt is immediately re-raised after exiting the handler.
These observations are the same whether the interrupt is triggered via a
write to ICS or in hardware.
Furthermore, I tested that this behavior is the same for other Other
events (MDAC, SRPD, ACK, MNG). Those were tested via a write to ICS
only, not in hardware.
This is a version of the test patch that I used to trigger lsc and rxo in
software and hardware. It applies over this patch series.
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index 0641c0098738..f54e7ac9c934 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -398,6 +398,7 @@
#define E1000_ICR_LSC 0x00000004 /* Link Status Change */
#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */
#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */
+#define E1000_ICR_RXO 0x00000040 /* rx overrun */
#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */
#define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */
/* If this bit asserted, the driver should claim the interrupt */
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 003cbd605799..4933c1beac74 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1802,98 +1802,20 @@ static void e1000_diag_test(struct net_device *netdev,
struct ethtool_test *eth_test, u64 *data)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
- u16 autoneg_advertised;
- u8 forced_speed_duplex;
- u8 autoneg;
- bool if_running = netif_running(netdev);
+ struct e1000_hw *hw = &adapter->hw;
pm_runtime_get_sync(netdev->dev.parent);
set_bit(__E1000_TESTING, &adapter->state);
- if (!if_running) {
- /* Get control of and reset hardware */
- if (adapter->flags & FLAG_HAS_AMT)
- e1000e_get_hw_control(adapter);
-
- e1000e_power_up_phy(adapter);
-
- adapter->hw.phy.autoneg_wait_to_complete = 1;
- e1000e_reset(adapter);
- adapter->hw.phy.autoneg_wait_to_complete = 0;
- }
-
if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
- /* Offline tests */
-
- /* save speed, duplex, autoneg settings */
- autoneg_advertised = adapter->hw.phy.autoneg_advertised;
- forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
- autoneg = adapter->hw.mac.autoneg;
-
- e_info("offline testing starting\n");
-
- if (if_running)
- /* indicate we're in test mode */
- e1000e_close(netdev);
-
- if (e1000_reg_test(adapter, &data[0]))
- eth_test->flags |= ETH_TEST_FL_FAILED;
-
- e1000e_reset(adapter);
- if (e1000_eeprom_test(adapter, &data[1]))
- eth_test->flags |= ETH_TEST_FL_FAILED;
-
- e1000e_reset(adapter);
- if (e1000_intr_test(adapter, &data[2]))
- eth_test->flags |= ETH_TEST_FL_FAILED;
-
- e1000e_reset(adapter);
- if (e1000_loopback_test(adapter, &data[3]))
- eth_test->flags |= ETH_TEST_FL_FAILED;
-
- /* force this routine to wait until autoneg complete/timeout */
- adapter->hw.phy.autoneg_wait_to_complete = 1;
- e1000e_reset(adapter);
- adapter->hw.phy.autoneg_wait_to_complete = 0;
-
- if (e1000_link_test(adapter, &data[4]))
- eth_test->flags |= ETH_TEST_FL_FAILED;
-
- /* restore speed, duplex, autoneg settings */
- adapter->hw.phy.autoneg_advertised = autoneg_advertised;
- adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
- adapter->hw.mac.autoneg = autoneg;
- e1000e_reset(adapter);
-
- clear_bit(__E1000_TESTING, &adapter->state);
- if (if_running)
- e1000e_open(netdev);
+ // LSC, RXO, MDAC, SRPD, ACK, MNG
+ ew32(ICS, E1000_ICR_RXO | E1000_ICR_OTHER);
} else {
- /* Online tests */
-
- e_info("online testing starting\n");
-
- /* register, eeprom, intr and loopback tests not run online */
- data[0] = 0;
- data[1] = 0;
- data[2] = 0;
- data[3] = 0;
-
- if (e1000_link_test(adapter, &data[4]))
- eth_test->flags |= ETH_TEST_FL_FAILED;
-
- clear_bit(__E1000_TESTING, &adapter->state);
- }
-
- if (!if_running) {
- e1000e_reset(adapter);
-
- if (adapter->flags & FLAG_HAS_AMT)
- e1000e_release_hw_control(adapter);
+ ew32(ICS, E1000_ICR_LSC | E1000_ICR_OTHER);
}
- msleep_interruptible(4 * 1000);
+ clear_bit(__E1000_TESTING, &adapter->state);
pm_runtime_put_sync(netdev->dev.parent);
}
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index fffc1f0e3895..5b3a0feaf052 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -46,6 +46,10 @@
#include "e1000.h"
+DEFINE_RATELIMIT_STATE(rx_ratelimit_state, 2 * HZ, 1);
+DEFINE_RATELIMIT_STATE(other_ratelimit_state, 2 * HZ, 1);
+DEFINE_RATELIMIT_STATE(other_ratelimit_state2, 2 * HZ, 1);
+
#define DRV_EXTRAVERSION "-k"
#define DRV_VERSION "3.2.6" DRV_EXTRAVERSION
@@ -936,6 +940,9 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
int cleaned_count = 0;
bool cleaned = false;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ static unsigned int count;
+
+ mdelay(10);
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
@@ -1067,6 +1074,16 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
adapter->total_rx_bytes += total_rx_bytes;
adapter->total_rx_packets += total_rx_packets;
+
+ count++;
+ if (__ratelimit(&rx_ratelimit_state)) {
+ static unsigned int max;
+ max = max(max, total_rx_packets);
+ trace_printk("rx %u now, max %u, %u rounds\n",
+ total_rx_packets, max, count);
+ count = 0;
+ }
+
return cleaned;
}
@@ -1914,14 +1931,30 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
- u32 icr = er32(ICR);
+ static unsigned int count;
+ u32 icr2, icr = er32(ICR);
/* Certain events (such as RXO) which trigger Other do not set
* INT_ASSERTED. In that case, read to clear of icr does not take
* place.
*/
+ /*
if (!(icr & E1000_ICR_INT_ASSERTED))
ew32(ICR, E1000_ICR_OTHER);
+ */
+
+ icr2 = er32(ICR);
+
+ count++;
+ if (__ratelimit(&other_ratelimit_state)) {
+ trace_printk("icr 0x%08x icr2 0x%08x count %u\n", icr, icr2,
+ count);
+ count = 0;
+ }
+ if (icr & E1000_ICR_RXO && icr & E1000_ICR_INT_ASSERTED &&
+ __ratelimit(&other_ratelimit_state2)) {
+ trace_printk("special icr 0x%08x icr2 0x%08x\n", icr, icr2);
+ }
if (icr & adapter->eiac_mask)
ew32(ICS, (icr & adapter->eiac_mask));