Re: [patch] revert: [NET]: Fix races in net_rx_action vs netpoll

From: Olaf Kirch
Date: Wed Jul 18 2007 - 10:44:25 EST


On Wednesday 18 July 2007 14:48, Ingo Molnar wrote:
> something i noticed: netconsole output seems to trickle through though,
> but very, very slowly (a packet once every 4 seconds or so). TCP/IP is
> not functional.
>
> also, i'm using netconsole via the command line (both the network driver
> and netconsole is built into the bzImage), maybe that makes a
> difference?

Possibly - but so far there's nothing in the code that jumped at me.

Can you try the following please? I'm still pretty much in the dark;
so I'm adding a bunch of printks. Let's hope this doesn't change the
timing in a way that makes the bug disappear.

Thanks
Olaf
--
Olaf Kirch | --- o --- Nous sommes du soleil we love when we play
okir@xxxxxx | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax
-----

---
include/linux/netdevice.h | 4 ++++
net/core/dev.c | 14 ++++++++++++++
2 files changed, 18 insertions(+)

Index: build-2.6/include/linux/netdevice.h
===================================================================
--- build-2.6.orig/include/linux/netdevice.h
+++ build-2.6/include/linux/netdevice.h
@@ -692,6 +692,7 @@ struct softnet_data
#ifdef CONFIG_NET_DMA
struct dma_chan *net_dma;
#endif
+ unsigned long scheduled;
};

DECLARE_PER_CPU(struct softnet_data,softnet_data);
@@ -1026,6 +1027,9 @@ static inline void netif_rx_complete(str
/* Prevent race with netpoll - yes, this is a kludge.
* But at least it doesn't penalize the non-netpoll
* code path. */
+ printk(KERN_DEBUG "netif_rx_complete(%s)%s\n", dev->name,
+ test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state)?
+ " - frozen" : "");
if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state))
return;
#endif
Index: build-2.6/net/core/dev.c
===================================================================
--- build-2.6.orig/net/core/dev.c
+++ build-2.6/net/core/dev.c
@@ -1192,6 +1192,7 @@ void __netif_rx_schedule(struct net_devi
{
unsigned long flags;

+ printk(KERN_DEBUG "__netif_rx_schedule(%s)\n", dev->name);
local_irq_save(flags);
dev_hold(dev);
list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
@@ -1199,6 +1200,7 @@ void __netif_rx_schedule(struct net_devi
dev->quota += dev->weight;
else
dev->quota = dev->weight;
+ __get_cpu_var(softnet_data).scheduled = jiffies;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
local_irq_restore(flags);
}
@@ -2028,6 +2030,9 @@ static void net_rx_action(struct softirq

local_irq_disable();

+ /* Complain if we're scheduled way too late. */
+ WARN_ON(time_after(jiffies, __get_cpu_var(softnet_data).scheduled + HZ));
+
while (!list_empty(&queue->poll_list)) {
struct net_device *dev;

@@ -2038,9 +2043,13 @@ static void net_rx_action(struct softirq

dev = list_entry(queue->poll_list.next,
struct net_device, poll_list);
+
have = netpoll_poll_lock(dev);

+ BUG_ON(test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state));
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
+ printk(KERN_DEBUG "netif_rx_action(%s) - keep on polling\n",
+ dev->name);
netpoll_poll_unlock(have);
local_irq_disable();
list_move_tail(&dev->poll_list, &queue->poll_list);
@@ -2049,6 +2058,10 @@ static void net_rx_action(struct softirq
else
dev->quota = dev->weight;
} else {
+ printk(KERN_DEBUG "netif_rx_action(%s) - done%s\n",
+ dev->name,
+ test_bit(__LINK_STATE_RX_SCHED, &dev->state)?
+ "; rx_sched set" : "");
netpoll_poll_unlock(have);
dev_put(dev);
local_irq_disable();
@@ -2074,6 +2087,7 @@ out:

softnet_break:
__get_cpu_var(netdev_rx_stat).time_squeeze++;
+ __get_cpu_var(softnet_data).scheduled = jiffies;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/