e1000 6.0.60 problems

From: Dror Cohen
Date: Mon Aug 15 2005 - 08:15:27 EST


Hello all,
we've (XIV ltd.) encountered a problem with the e1000 driver. the basic
problem was that we'd see every few hours a NETDEV message notifying us
that the NIC has been reset. Our enviroment is a cluster of linux
based (2.4.27) dual xeon on super micro board with the e1000 onboard
(82546GB). There is a very high load of traffic on the e1000 (mainly due
to iSCSI operations, ssh to the machines and sometimes we also use iperf
to really exhaust the network bandwidth). This happend with version 4 of the
driver. We've upgraded to the version 6.0.54 and turned on NAPI (in
hope that this will resolve the problem). here's our modules.conf
file:

"options e1000
RxDescriptors=1024,1024,1024,1024,1024,1024,1024,1024,1024,1024
TxDescriptors=1024,1024,1024,1024,1024,1024,1024,1024,1024,1024
InterruptThrottleRate=2000,2000,2000,2000,2000,2000,2000,2000,2000,2000"

This made the NETDEV messages disaapear but the machines started getting
stuck totally (no console, no kdb). the var log messages would show
allocation problems and after enabling some debug info we'd see
something of this sort:

"Jul 29 12:40:06 mnfc-150-101 kernel: __alloc_pages: 2-order allocation
failed (gfp=0x20/0)
Jul 29 12:40:06 mnfc-150-101 kernel: b1cb1be0 b1cb1c2c 8013aee1 8034b8a0
00000002 00000020 00000000 00001180
Jul 29 12:40:06 mnfc-150-101 kernel: 00000004 00000000 0000000c
80446abc 80446c74 00000002 00000020 000000
02
Jul 29 12:40:06 mnfc-150-101 kernel: 00000020 81b0f9b8 00000000
b1cb1c34 8013aa66 b1cb1c3c 8013af45 b1cb1c
6c
Jul 29 12:40:06 mnfc-150-101 kernel: Call Trace:
Jul 29 12:40:06 mnfc-150-101 kernel: [<8013aee1>]
__alloc_pages+0x271/0x290 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<8013aa66>]
_alloc_pages+0x16/0x20 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<8013af45>]
__get_free_pages+0x45/0x50 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<80137ce6>]
kmem_cache_grow+0xc6/0x280 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<80138239>] kmalloc+0xa9/0x170
[kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<802c8d04>] alloc_skb+0xc4/0x1e0
[kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<c080e4ff>]
e1000_alloc_rx_buffers+0x5b/0x358 [e1000]
Jul 29 12:40:06 mnfc-150-101 kernel: [<c080de64>]
e1000_clean_rx_irq+0x1f0/0x44c [e1000]
Jul 29 12:40:06 mnfc-150-101 kernel: [<c080d7dc>] e1000_clean+0x44/0xe4
[e1000]
Jul 29 12:40:06 mnfc-150-101 kernel: [<802cda50>]
net_rx_action+0xa0/0x140 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<80122609>] do_softirq+0xe9/0xf0
[kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<8010b366>] do_IRQ+0xf6/0x100
[kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<8010dcb8>] call_do_IRQ+0x5/0xd
[kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<8032532c>]
__generic_copy_to_user+0x3c/0x60 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<802ca91c>]
memcpy_toiovec+0x5c/0x70 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<802cafb8>]
skb_copy_datagram_iovec+0x48/0x200 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<802e9ee3>]
tcp_recvmsg+0x753/0x990 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<80153414>]
poll_freewait+0x44/0x50 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<803098da>]
inet_recvmsg+0x4a/0x60 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<802c548e>]
sock_recvmsg+0x3e/0xe0 [kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<802c55cc>] sock_read+0x9c/0xb0
[kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<8014216c>] sys_read+0x9c/0x150
[kernel]
Jul 29 12:40:06 mnfc-150-101 kernel: [<80109163>] system_call+0x33/0x38
[kernel]"

after upgrading to 6.0.60 there was no change. comparing the version
with the one coming with the 2.6 kernel proved that one major change
wasn't backported to the 6.0.60 (the watch dog change). We've ported
this back to the 6.0.60 (attached is the diff between the 6.0.60 from
the site to the one with our backporting). now we're seeing back the
NETDEV (less frequent and the whole reset process is little faster, but
still very annoying) along with the "e1000_clean_tx_irq: Detected Tx
Unit Hang" messages. The allocations problems (?) also seem to exists as
well. here's what it looks like:

"Aug 13 06:03:39 14.10.140.6 kernel: e1000: eth1: e1000_clean_tx_irq:
Detected Tx Unit Hang
Aug 13 06:03:39 14.10.140.6 kernel: TDH <242>
Aug 13 06:03:39 14.10.140.6 kernel: TDT <1d>
Aug 13 06:03:39 14.10.140.6 kernel: next_to_use <1d>
Aug 13 06:03:39 14.10.140.6 kernel: next_to_clean <242>
Aug 13 06:03:39 14.10.140.6 kernel: buffer_info[next_to_clean]
Aug 13 06:03:39 14.10.140.6 kernel: dma <6ea1c06a>
Aug 13 06:03:39 14.10.140.6 kernel: time_stamp <160b24f>
Aug 13 06:03:39 14.10.140.6 kernel: next_to_watch <245>
Aug 13 06:03:39 14.10.140.6 kernel: jiffies <160b310>
Aug 13 06:03:39 14.10.140.6 kernel: next_to_watch.status <0>
Aug 13 06:03:41 14.10.140.6 kernel: e1000: eth1: e1000_clean_tx_irq:
Detected Tx Unit Hang
Aug 13 06:03:41 14.10.140.6 kernel: TDH <242>
Aug 13 06:03:41 14.10.140.6 kernel: TDT <1d>
Aug 13 06:03:41 14.10.140.6 kernel: next_to_use <1d>
Aug 13 06:03:41 14.10.140.6 kernel: next_to_clean <242>
Aug 13 06:03:41 14.10.140.6 kernel: buffer_info[next_to_clean]
Aug 13 06:03:41 14.10.140.6 kernel: dma <6ea1c06a>
Aug 13 06:03:41 14.10.140.6 kernel: time_stamp <160b24f>
Aug 13 06:03:41 14.10.140.6 kernel: next_to_watch <245>
Aug 13 06:03:41 14.10.140.6 kernel: jiffies <160b3d8>
Aug 13 06:03:41 14.10.140.6 kernel: next_to_watch.status <0>
Aug 13 06:03:43 14.10.140.6 kernel: e1000: eth1: e1000_clean_tx_irq:
Detected Tx Unit Hang
Aug 13 06:03:43 14.10.140.6 kernel: TDH <242>
Aug 13 06:03:43 14.10.140.6 kernel: TDT <1d>
Aug 13 06:03:43 14.10.140.6 kernel: next_to_use <1d>
Aug 13 06:03:43 14.10.140.6 kernel: next_to_clean <242>
Aug 13 06:03:43 14.10.140.6 kernel: buffer_info[next_to_clean]
Aug 13 06:03:43 14.10.140.6 kernel: dma <6ea1c06a>
Aug 13 06:03:43 14.10.140.6 kernel: time_stamp <160b24f>
Aug 13 06:03:43 14.10.140.6 kernel: next_to_watch <245>
Aug 13 06:03:43 14.10.140.6 kernel: jiffies <160b4a0>
Aug 13 06:03:43 14.10.140.6 kernel: next_to_watch.status <0>
Aug 13 06:03:45 14.10.140.6 kernel: e1000: eth1: e1000_clean_tx_irq:
Detected Tx Unit Hang
Aug 13 06:03:45 14.10.140.6 kernel: TDH <242>
Aug 13 06:03:45 14.10.140.6 kernel: TDT <1d>
Aug 13 06:03:45 14.10.140.6 kernel: next_to_use <1d>
Aug 13 06:03:45 14.10.140.6 kernel: next_to_clean <242>
Aug 13 06:03:45 14.10.140.6 kernel: buffer_info[next_to_clean]
Aug 13 06:03:45 14.10.140.6 kernel: dma <6ea1c06a>
Aug 13 06:03:45 14.10.140.6 kernel: time_stamp <160b24f>
Aug 13 06:03:45 14.10.140.6 kernel: next_to_watch <245>
Aug 13 06:03:45 14.10.140.6 kernel: jiffies <160b568>
Aug 13 06:03:45 14.10.140.6 kernel: next_to_watch.status <0>
Aug 13 06:03:47 14.10.140.6 kernel: e1000: eth1: e1000_clean_tx_irq:
Detected Tx Unit Hang
Aug 13 06:03:47 14.10.140.6 kernel: TDH <242>
Aug 13 06:03:47 14.10.140.6 kernel: TDT <1d>
Aug 13 06:03:47 14.10.140.6 kernel: next_to_use <1d>
Aug 13 06:03:47 14.10.140.6 kernel: next_to_clean <242>
Aug 13 06:03:47 14.10.140.6 kernel: buffer_info[next_to_clean]
Aug 13 06:03:47 14.10.140.6 kernel: dma <6ea1c06a>
Aug 13 06:03:47 14.10.140.6 kernel: time_stamp <160b24f>
Aug 13 06:03:47 14.10.140.6 kernel: next_to_watch <245>
Aug 13 06:03:47 14.10.140.6 kernel: jiffies <160b630>
Aug 13 06:03:47 14.10.140.6 kernel: next_to_watch.status <0>
Aug 13 06:03:47 14.10.140.6 kernel: NETDEV WATCHDOG: eth1: transmit
timed out
Aug 13 06:03:50 14.10.140.6 kernel: e1000: eth1: e1000_watchdog_task:
NIC Link is Up 1000 Mbps Full Duplex"

when using the latest 6.1.16 version of the driver (which doesn't seem
to have the watch dog fix, but a lot of other fixes) we see similar
results to our modified version - there are still NETDEV events with
the "Detected Tx Unit Hang" message followed by the output of the
registers.

more information can be supplied if needed.

our questions:
* Is anyone experiencing similar problems (looks to us alot like: "[
955064 ] e1000 freezes Linux with 82544GC (similar to a year-old bug)") ?

* Was the watch dog change forgotten, or left out deliberetly ? Does our
patch seems to be OK ? it is also not present in the latest version (6.1.6).

* are the allocation problems related in anyway to e1000 problem ?

thanks,


--
Dror Cohen
XIV ltd.

email: dror at xiv.co.il
phone: +972-3-6074726
diff -uprN -X dontdiff src/e1000.h modified_src/e1000.h
--- src/e1000.h 2005-05-18 00:31:44.000000000 +0300
+++ modified_src/e1000.h 2005-08-15 09:09:40.792696903 +0300
@@ -232,6 +232,7 @@ struct e1000_adapter {
spinlock_t stats_lock;
atomic_t irq_sem;
struct work_struct tx_timeout_task;
+ struct work_struct watchdog_task;
uint8_t fc_autoneg;

#ifdef ETHTOOL_PHYS_ID
diff -uprN -X dontdiff src/e1000_main.c modified_src/e1000_main.c
--- src/e1000_main.c 2005-05-18 00:31:44.000000000 +0300
+++ modified_src/e1000_main.c 2005-08-15 09:09:40.883695280 +0300
@@ -176,6 +176,7 @@ static void e1000_clean_rx_ring(struct e
static void e1000_set_multi(struct net_device *netdev);
static void e1000_update_phy_info(unsigned long data);
static void e1000_watchdog(unsigned long data);
+static void e1000_watchdog_task(struct e1000_adapter *adapter);
static void e1000_82547_tx_fifo_stall(unsigned long data);
static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
static struct net_device_stats * e1000_get_stats(struct net_device *netdev);
@@ -715,6 +716,9 @@ e1000_probe(struct pci_dev *pdev,
adapter->watchdog_timer.function = &e1000_watchdog;
adapter->watchdog_timer.data = (unsigned long) adapter;

+ INIT_WORK(&adapter->watchdog_task,
+ (void (*)(void *))e1000_watchdog_task, adapter);
+
init_timer(&adapter->phy_info_timer);
adapter->phy_info_timer.function = &e1000_update_phy_info;
adapter->phy_info_timer.data = (unsigned long) adapter;
@@ -812,6 +816,8 @@ e1000_remove(struct pci_dev *pdev)
struct e1000_adapter *adapter = netdev_priv(netdev);
uint32_t manc, swsm;

+ flush_scheduled_work();
+
if(adapter->hw.mac_type >= e1000_82540 &&
adapter->hw.media_type == e1000_media_type_copper) {
manc = E1000_READ_REG(&adapter->hw, MANC);
@@ -1829,6 +1835,14 @@ static void
e1000_watchdog(unsigned long data)
{
struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+
+ /* Do the rest outside of interrupt context */
+ schedule_work(&adapter->watchdog_task);
+}
+
+static void
+e1000_watchdog_task(struct e1000_adapter *adapter)
+{
struct net_device *netdev = adapter->netdev;
struct e1000_desc_ring *txdr = &adapter->tx_ring;
uint32_t link;
@@ -2392,6 +2406,9 @@ e1000_xmit_frame(struct sk_buff *skb, st
tso = e1000_tso(adapter, skb);
if(tso < 0) {
dev_kfree_skb_any(skb);
+#ifdef NETIF_F_LLTX
+ spin_unlock_irqrestore(&adapter->tx_lock, flags);
+#endif
return NETDEV_TX_OK;
}