Re: BNX2: Kernel crashes with 2.6.31 and 2.6.31.9

From: Bruno PrÃmont
Date: Fri Feb 19 2010 - 03:10:45 EST


Hi Benjamin,

On Tue, 29 Dec 2009 21:08:11 "Benjamin Li" wrote:
> Could you try running with the attached patch? This debug patch is
> built against the linux-2.6.31.9 kernel. I think the panic is
> occuring right before a reset has occured due to a TX timeout. To
> see if this is happening, this patch will print hardware state
> information when a TX timeout occurs. If you could run with this
> patch and send the logs when the panic occurs, I would really
> appreciate it.
>
> Thanks again.
>
> -Ben

Sorry for replying only this late but I've been too busy with other
things.

Anyhow, I've been doing some more testing yesterday and today and now
am able to reproduce the/a crash pretty easily.

Either running netconsole and doing 'echo t > /proc/sysrq-trigger' via
SSH on otherwise idle server (from local console nothing bad happens),
but then I have no means to communicate with the kernel (I guess it's
deadlocked somewhere in printk code)

The slightly less easy way to trigger it is with a dummy module that
kind of simulates netconsole behavior but with dummy data (see
attached). I have to have some more traffic (TCP?) going on for the bug
to trigger and tell my module multiple times to push data. This way
server is still accessible via VGA or serial console.

Attached are my 'netbomb.c' (which is a modified netconsole.c) and
full kernel log. This time running a 2.6.33-rc8-git3 kernel, having
forward-ported your patch above (e.g. half of it was already present)


I this time I got the following trace:
[ 134.643292] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 134.643304] IP: [<ffffffffa003edc2>] bnx2_poll_work+0x32/0x13d0 [bnx2]
[ 134.643314] PGD 2a972a067 PUD 2aa245067 PMD 0
[ 134.643319] Oops: 0000 [#1] SMP
[ 134.643323] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:01:04.6/class
[ 134.643328] CPU 4
[ 134.643334] Pid: 3226, comm: cat Not tainted 2.6.33-rc8-git3-x86_64 #3 /ProLiant DL360 G5
[ 134.643339] RIP: 0010:[<ffffffffa003edc2>] [<ffffffffa003edc2>] bnx2_poll_work+0x32/0x13d0 [bnx2]
[ 134.643347] RSP: 0018:ffff8802a9643b38 EFLAGS: 00010092
[ 134.643351] RAX: 0000000000000000 RBX: ffff8802afab57c0 RCX: 0000000000000010
[ 134.643355] RDX: 0000000000000000 RSI: ffff8802afab57c0 RDI: ffff8802afab4580
[ 134.643359] RBP: ffff8802a9643cd8 R08: ffff8802af051000 R09: 0000000000000007
[ 134.643363] R10: 000000000000000e R11: 0000000000000000 R12: 0000000000000000
[ 134.643367] R13: 0000000000000010 R14: 0000000000000000 R15: ffff8802afab4580
[ 134.643371] FS: 0000000000000000(0000) GS:ffff880028300000(0063) knlGS:00000000f765f6c0
[ 134.643376] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b
[ 134.643380] CR2: 0000000000000000 CR3: 00000002a9606000 CR4: 00000000000006e0
[ 134.643384] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 134.643388] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 134.643392] Process cat (pid: 3226, threadinfo ffff8802a9642000, task ffff8802aa14bff0)
[ 134.643396] Stack:
[ 134.643398] 0000000000000070 0000000000000002 0000000000000010 ffff8802afab57c0
[ 134.643404] <0> ffff8802afab4580 0000000300000002 0000000000000000 0000000100000002
[ 134.643410] <0> 0000000000000000 0000000200025220 ffffffff81862e80 0000000000000001
[ 134.643418] Call Trace:
[ 134.643427] [<ffffffff8107e3fe>] ? __alloc_pages_nodemask+0xfe/0x660
[ 134.643433] [<ffffffff811b6de6>] ? msi_set_mask_bit+0x26/0xc0
[ 134.643438] [<ffffffff811b6e8b>] ? unmask_msi_irq+0xb/0x10
[ 134.643443] [<ffffffff8106db54>] ? default_enable+0x24/0x40
[ 134.643448] [<ffffffff8106d9b6>] ? check_irq_resend+0x26/0x70
[ 134.643453] [<ffffffff8106cc23>] ? __enable_irq+0x73/0x80
[ 134.643459] [<ffffffffa004019e>] bnx2_poll_msix+0x3e/0xd0 [bnx2]
[ 134.643465] [<ffffffff8135bcd1>] netpoll_poll+0xe1/0x3c0
[ 134.643470] [<ffffffff8135c168>] netpoll_send_skb+0x118/0x210
[ 134.643475] [<ffffffff8135c45b>] netpoll_send_udp+0x1fb/0x210
[ 134.643480] [<ffffffffa00981c5>] write_msg+0x95/0xd0 [netbomb]
[ 134.643485] [<ffffffffa0098255>] netbomb_write+0x55/0xa4 [netbomb]
[ 134.643492] [<ffffffff810f6581>] proc_reg_write+0x71/0xb0
[ 134.643498] [<ffffffff810ab6cb>] vfs_write+0xcb/0x180
[ 134.643503] [<ffffffff810ab870>] sys_write+0x50/0x90
[ 134.643509] [<ffffffff8102a1a4>] sysenter_dispatch+0x7/0x2b
[ 134.643513] Code: 56 41 55 41 54 53 48 81 ec 78 01 00 00 48 89 bd 80 fe ff ff 48 89 b5 78 fe ff ff 89 95 74 fe ff ff 89 8d 70 fe ff ff 48 8b 46 70 <0f> b7 10 31 c0 80 fa ff 0f 94 c0 01 c2 66 39 96 12 02 00 00 0f
[ 134.643551] RIP [<ffffffffa003edc2>] bnx2_poll_work+0x32/0x13d0 [bnx2]
[ 134.643557] RSP <ffff8802a9643b38>
[ 134.643559] CR2: 0000000000000000
[ 134.643563] ---[ end trace 48bdec67d6d7aadb ]---

Running objdump on kernel compile with debugging symbols this matches:
000000000000ad90 <bnx2_poll_work>:
}
}

static int bnx2_poll_work(struct bnx2 *bp, struct bnx2_napi *bnapi,
int work_done, int budget)
{
ad90: 55 push %rbp
ad91: 48 89 e5 mov %rsp,%rbp
ad94: 41 57 push %r15
ad96: 41 56 push %r14
ad98: 41 55 push %r13
ad9a: 41 54 push %r12
ad9c: 53 push %rbx
ad9d: 48 81 ec 78 01 00 00 sub $0x178,%rsp
ada4: 48 89 bd 80 fe ff ff mov %rdi,-0x180(%rbp)
adab: 48 89 b5 78 fe ff ff mov %rsi,-0x188(%rbp)
adb2: 89 95 74 fe ff ff mov %edx,-0x18c(%rbp)
adb8: 89 8d 70 fe ff ff mov %ecx,-0x190(%rbp)
{
u16 cons;

/* Tell compiler that status block fields can change. */
barrier();
cons = *bnapi->hw_tx_cons_ptr;
adbe: 48 8b 46 70 mov 0x70(%rsi),%rax
adc2: 0f b7 10 movzwl (%rax),%edx
barrier();
if (unlikely((cons & MAX_TX_DESC_CNT) == MAX_TX_DESC_CNT))
cons++;
adc5: 31 c0 xor %eax,%eax
adc7: 80 fa ff cmp $0xff,%dl
adca: 0f 94 c0 sete %al
adcd: 01 c2 add %eax,%edx
int work_done, int budget)
{
struct bnx2_tx_ring_info *txr = &bnapi->tx_ring;
struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring;

if (bnx2_get_hw_tx_cons(bnapi) != txr->hw_tx_cons)
adcf: 66 39 96 12 02 00 00 cmp %dx,0x212(%rsi)
add6: 0f 84 4f 03 00 00 je b12b <bnx2_poll_work+0x39b>


So as already determined bnapi->hw_tx_cons_ptr is NULL... but nothing is
happening after that on network side.

Regards,
Bruno

Attachment: bnx2.dmesg
Description: Binary data

/*
* linux/drivers/net/netbomb.c
*
* Based on linux/drivers/net/netconsole.c,
* adapted by Bruno Prémont <bonbons@xxxxxxxxxxxxxxxxx>
*/

/****************************************************************
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
****************************************************************/

#include <linux/mm.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/console.h>
#include <linux/moduleparam.h>
#include <linux/string.h>
#include <linux/netpoll.h>
#include <linux/inet.h>
#include <linux/seq_file.h>
#include <linux/configfs.h>

MODULE_AUTHOR("Maintainer: Bruno Premont <bruno.premont@xxxxxxxxxx>");
MODULE_DESCRIPTION("Network traffic generator");
MODULE_LICENSE("GPL");

#define MAX_PARAM_LENGTH 256
#define MAX_PRINT_CHUNK 1000

static char config[MAX_PARAM_LENGTH];
module_param_string(netbomb, config, MAX_PARAM_LENGTH, 0);
MODULE_PARM_DESC(netbomb, " netbomb=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]");

#ifndef MODULE
static int __init option_setup(char *opt)
{
strlcpy(config, opt, MAX_PARAM_LENGTH);
return 1;
}
__setup("netbomb=", option_setup);
#endif /* MODULE */

/* Linked list of all configured targets */
static LIST_HEAD(target_list);

/* This needs to be a spinlock because write_msg() cannot sleep */
static DEFINE_SPINLOCK(target_list_lock);

/**
* struct netbomb_target - Represents a configured netbomb target.
* @list: Links this target into the target_list.
* @item: Links us into the configfs subsystem hierarchy.
* @enabled: On / off knob to enable / disable target.
* Visible from userspace (read-write).
* We maintain a strict 1:1 correspondence between this and
* whether the corresponding netpoll is active or inactive.
* Also, other parameters of a target may be modified at
* runtime only when it is disabled (enabled == 0).
* @np: The netpoll structure for this target.
* Contains the other userspace visible parameters:
* dev_name (read-write)
* local_port (read-write)
* remote_port (read-write)
* local_ip (read-write)
* remote_ip (read-write)
* local_mac (read-only)
* remote_mac (read-write)
*/
struct netbomb_target {
struct list_head list;
int enabled;
struct netpoll np;
};

/*
* No danger of targets going away from under us when dynamic
* reconfigurability is off.
*/
static void netbomb_target_get(struct netbomb_target *nt)
{
}

static void netbomb_target_put(struct netbomb_target *nt)
{
}

/* Allocate new target (from boot/module param) and setup netpoll for it */
static struct netbomb_target *alloc_param_target(char *target_config)
{
int err = -ENOMEM;
struct netbomb_target *nt;

/*
* Allocate and initialize with defaults.
* Note that these targets get their config_item fields zeroed-out.
*/
nt = kzalloc(sizeof(*nt), GFP_KERNEL);
if (!nt) {
printk(KERN_ERR "netbomb: failed to allocate memory\n");
goto fail;
}

nt->np.name = "netbomb";
strlcpy(nt->np.dev_name, "eth0", IFNAMSIZ);
nt->np.local_port = 6665;
nt->np.remote_port = 6666;
memset(nt->np.remote_mac, 0xff, ETH_ALEN);

/* Parse parameters and setup netpoll */
err = netpoll_parse_options(&nt->np, target_config);
if (err)
goto fail;

err = netpoll_setup(&nt->np);
if (err)
goto fail;

nt->enabled = 1;

return nt;

fail:
kfree(nt);
return ERR_PTR(err);
}

/* Cleanup netpoll for given target (from boot/module param) and free it */
static void free_param_target(struct netbomb_target *nt)
{
netpoll_cleanup(&nt->np);
kfree(nt);
}

/* Handle network interface device notifications */
static int netbomb_netdev_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
unsigned long flags;
struct netbomb_target *nt;
struct net_device *dev = ptr;

if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
goto done;

spin_lock_irqsave(&target_list_lock, flags);
list_for_each_entry(nt, &target_list, list) {
netbomb_target_get(nt);
if (nt->np.dev == dev) {
switch (event) {
case NETDEV_CHANGENAME:
strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
break;
case NETDEV_UNREGISTER:
if (!nt->enabled)
break;
netpoll_cleanup(&nt->np);
nt->enabled = 0;
printk(KERN_INFO "netbomb: network logging stopped"
", interface %s unregistered\n",
dev->name);
break;
}
}
netbomb_target_put(nt);
}
spin_unlock_irqrestore(&target_list_lock, flags);

done:
return NOTIFY_DONE;
}

static struct notifier_block netbomb_netdev_notifier = {
.notifier_call = netbomb_netdev_event,
};

static void write_msg(struct console *con, const char *msg, unsigned int len)
{
int frag, left;
unsigned long flags;
struct netbomb_target *nt;
const char *tmp;

/* Avoid taking lock and disabling interrupts unnecessarily */
if (list_empty(&target_list))
return;

spin_lock_irqsave(&target_list_lock, flags);
list_for_each_entry(nt, &target_list, list) {
netbomb_target_get(nt);
if (nt->enabled && netif_running(nt->np.dev)) {
/*
* We nest this inside the for-each-target loop above
* so that we're able to get as much logging out to
* at least one target if we die inside here, instead
* of unnecessarily keeping all targets in lock-step.
*/
tmp = msg;
for (left = len; left;) {
frag = min(left, MAX_PRINT_CHUNK);
netpoll_send_udp(&nt->np, tmp, frag);
tmp += frag;
left -= frag;
}
}
netbomb_target_put(nt);
}
spin_unlock_irqrestore(&target_list_lock, flags);
}

static struct console netbomb = {
.name = "netcon",
.flags = CON_ENABLED,
.write = write_msg,
};

static int netbomb_show(struct seq_file *m, void *v)
{
seq_printf(m, "Array of bytes indicating packet lengths\n");
return 0;
}

static int netbomb_open(struct inode *inode, struct file *file)
{
return single_open(file, netbomb_show, NULL);
}

static const char * bufs[256];

static ssize_t netbomb_write(struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
int i;

for (i = 0; i < count; i++) {
unsigned char c;
const char *buff;

if (copy_from_user(&c, buffer+i, 1))
return -EFAULT;

if ((buff = bufs[c]) != NULL)
write_msg(NULL, buff, strlen(buff));
}
return count;
}

static const struct file_operations netbomb_fops = {
.owner = THIS_MODULE,
.open = netbomb_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = netbomb_write,
};

static int __init init_netbomb(void)
{
int err, i;
struct netbomb_target *nt, *tmp;
unsigned long flags;
char *target_config;
char *input = config;

if (strnlen(input, MAX_PARAM_LENGTH)) {
while ((target_config = strsep(&input, ";"))) {
nt = alloc_param_target(target_config);
if (IS_ERR(nt)) {
err = PTR_ERR(nt);
goto fail;
}
/* Dump existing printks when we register */
netbomb.flags |= CON_PRINTBUFFER;

spin_lock_irqsave(&target_list_lock, flags);
list_add(&nt->list, &target_list);
spin_unlock_irqrestore(&target_list_lock, flags);
}
}

for (i = 0; i < 256; i++) {
char *p = kmalloc(i > 0 ? i+1 : 2, GFP_KERNEL);
bufs[i] = p;
if (p == NULL)
continue;
if (i == 0) {
p[0] = '\n';
p[1] = '\0';
} else {
memset(p, '_', i > 0 ? i+1 : 2);
if (i < 10)
sprintf(p + i - 1, "%d", i);
else if (i < 100)
sprintf(p + i - 2, "%d", i);
else
sprintf(p + i - 3, "%d", i);
p[i] = '\0';
}
}

err = register_netdevice_notifier(&netbomb_netdev_notifier);
if (err)
goto fail;
proc_create("netbomb-trigger", 0, NULL, &netbomb_fops);

printk(KERN_INFO "netbomb: network logging started\n");

return err;

undonotifier:
unregister_netdevice_notifier(&netbomb_netdev_notifier);

fail:
printk(KERN_ERR "netbomb: cleaning up\n");

/*
* Remove all targets and destroy them (only targets created
* from the boot/module option exist here). Skipping the list
* lock is safe here, and netpoll_cleanup() will sleep.
*/
list_for_each_entry_safe(nt, tmp, &target_list, list) {
list_del(&nt->list);
free_param_target(nt);
}

return err;
}

static void __exit cleanup_netbomb(void)
{
struct netbomb_target *nt, *tmp;
int i;

remove_proc_entry("netbomb-trigger", NULL);
unregister_netdevice_notifier(&netbomb_netdev_notifier);

/*
* Targets created via configfs pin references on our module
* and would first be rmdir(2)'ed from userspace. We reach
* here only when they are already destroyed, and only those
* created from the boot/module option are left, so remove and
* destroy them. Skipping the list lock is safe here, and
* netpoll_cleanup() will sleep.
*/
list_for_each_entry_safe(nt, tmp, &target_list, list) {
list_del(&nt->list);
free_param_target(nt);
}

for (i = 0; i < 256; i++)
kfree(bufs[i]);
}

module_init(init_netbomb);
module_exit(cleanup_netbomb);