Re: BUG: unable to handle kernel NULL pointer dereference at 0000000000000040

From: Ming Lei
Date: Fri Jun 14 2013 - 13:02:34 EST


On Fri, Jun 14, 2013 at 10:30 PM, Bjorn Helgaas <bhelgaas@xxxxxxxxxx> wrote:
> [+cc Ming, Hayes, Francois, r8169 list]
>
> On Fri, Jun 14, 2013 at 6:49 AM, nirinA raseliarison
> <nirina.raseliarison@xxxxxxxxx> wrote:
>> hello there,
>> i have this ethernet controler:
>>
>> Realtek Semiconductor Co., Ltd. RTL8101E/RTL8102E PCI Express Fast Ethernet
>> controller (rev 05)
>>
>> that uses the r8169 module.
>> it works fine, but sometimes after a reboot and issueing:
>>
>> ifconfig eth0 192.168.1.1 up
>>
>> i got the message below. after another reboot the
>> message disappears. i also get the same message this 3.9.5 and 3.9.4.
>>
>> it seems i catch my first oops and don't know what to do with it.
>> currently running:
>>
>> cat /proc/version
>> Linux version 3.9.6.20130614 (root@supernova) (gcc version 4.8.1 (GCC) ) #1
>> SMP Fri Jun 14 09:14:50 EAT 2013
>>
>> uname -a
>> Linux supernova 3.9.6.20130614 #1 SMP Fri Jun 14 09:14:50 EAT 2013 x86_64
>> Intel(R) Celeron(R) CPU G1610 @ 2.60GHz GenuineIntel GNU/Linux
>>
>> thanks,
>> -----------------8<------------------------------8<---------------------------------------
>>
>> [ 57.877560] BUG: unable to handle kernel NULL pointer dereference at
>> 0000000000000040
>> [ 57.877603] IP: [<ffffffff81491844>] fw_load_abort.isra.5+0x4/0x20
>> [ 57.877634] PGD 21330a067 PUD 211a3a067 PMD 0
>> [ 57.877660] Oops: 0002 [#1] SMP
>> [ 57.877681] Modules linked in: fuse coretemp kvm_intel kvm evdev r8169
>> microcode mii
>> [ 57.877735] CPU 0
>> [ 57.877746] Pid: 1950, comm: firmware Not tainted 3.9.6.20130614 #1 To be
>> filled by O.E.M. To be filled by O.E.M./ONDA H61V Ver:4.01
>> [ 57.877790] RIP: 0010:[<ffffffff81491844>] [<ffffffff81491844>]
>> fw_load_abort.isra.5+0x4/0x20
>> [ 57.877824] RSP: 0018:ffff8802119a7e80 EFLAGS: 00010246
>> [ 57.877844] RAX: ffff8802158fe250 RBX: ffff880211a03b40 RCX:
>> 0000000000000000
>> [ 57.877869] RDX: ffffffff81c742c8 RSI: ffff8802158fe250 RDI:
>> 0000000000000000
>> [ 57.877895] RBP: ffff8802119a7e80 R08: ffff8802119a6000 R09:
>> 00000000000005aa
>> [ 57.877920] R10: 0000000000000000 R11: 0000000000000000 R12:
>> ffffffffffffffff
>> [ 57.877945] R13: ffff880213d34088 R14: 0000000000000003 R15:
>> ffff88020eafc230
>> [ 57.877970] FS: 00007f3c6cb2a740(0000) GS:ffff88021f200000(0000)
>> knlGS:0000000000000000
>> [ 57.877998] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [ 57.878019] CR2: 0000000000000040 CR3: 0000000203155000 CR4:
>> 00000000001407f0
>> [ 57.878044] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>> 0000000000000000
>> [ 57.878069] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
>> 0000000000000400
>> [ 57.878094] Process firmware (pid: 1950, threadinfo ffff8802119a6000,
>> task ffff8802158fe250)
>> [ 57.878124] Stack:
>> [ 57.878133] ffff8802119a7eb0 ffffffff81491917 ffff880211a4d5a0
>> 0000000000000003
>> [ 57.878168] ffff8802119a7f50 ffffffff818765a0 ffff8802119a7ec0
>> ffffffff81483063
>> [ 57.878203] ffff8802119a7f08 ffffffff8119bc9e ffff880213d34098
>> ffff880211a4d5c0
>> [ 57.878237] Call Trace:
>> [ 57.878251] [<ffffffff81491917>] firmware_loading_store+0x77/0x150
>> [ 57.878275] [<ffffffff81483063>] dev_attr_store+0x13/0x20
>> [ 57.878297] [<ffffffff8119bc9e>] sysfs_write_file+0xce/0x140
>> [ 57.878320] [<ffffffff81133e8a>] vfs_write+0x9a/0x160
>> [ 57.878340] [<ffffffff81134164>] sys_write+0x44/0x90
>> [ 57.878360] [<ffffffff817d70ed>] system_call_fastpath+0x1a/0x1f
>> [ 57.879379] Code: 6b ff ff ff 48 89 df 31 db e8 b9 b0 c9 ff e9 79 ff ff
>> ff 0f 1f 40 00 48 83 c4 10 5b 41 5c 41 5d 41 5e 5d c3 0f 1f 00 55 48 89 e5
>> <f0> 80 4f 40 04 48 83 c7 18 e8 8e a9 bd ff 5d c3 66 66 66 2e 0f
>> [ 57.881753] RIP [<ffffffff81491844>] fw_load_abort.isra.5+0x4/0x20
>> [ 57.882888] RSP <ffff8802119a7e80>
>> [ 57.884019] CR2: 0000000000000040
>> [ 57.885166] ---[ end trace 6705f6d4ce6b6a12 ]---

Looks it is a double abort race, could you try below patch?
(also attached for applying)

--
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 6ede229..a217ba8 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -550,7 +550,12 @@ static ssize_t firmware_loading_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct firmware_priv *fw_priv = to_firmware_priv(dev);
- int loading = test_bit(FW_STATUS_LOADING, &fw_priv->buf->status);
+ int loading = 0;
+
+ mutex_lock(&fw_lock);
+ if (fw_priv->buf)
+ loading = test_bit(FW_STATUS_LOADING, &fw_priv->buf->status);
+ mutex_unlock(&fw_lock);

return sprintf(buf, "%d\n", loading);
}
@@ -592,12 +597,12 @@ static ssize_t firmware_loading_store(struct device *dev,
const char *buf, size_t count)
{
struct firmware_priv *fw_priv = to_firmware_priv(dev);
- struct firmware_buf *fw_buf = fw_priv->buf;
+ struct firmware_buf *fw_buf;
int loading = simple_strtol(buf, NULL, 10);
int i;

mutex_lock(&fw_lock);
-
+ fw_buf = fw_priv->buf;
if (!fw_buf)
goto out;

@@ -636,6 +641,7 @@ static ssize_t firmware_loading_store(struct device *dev,
/* fallthrough */
case -1:
fw_load_abort(fw_buf);
+ fw_priv->buf = NULL;
break;
}
out:
@@ -704,6 +710,7 @@ static int fw_realloc_buffer(struct firmware_priv
*fw_priv, int min_size)
GFP_KERNEL);
if (!new_pages) {
fw_load_abort(buf);
+ fw_priv->buf = NULL;
return -ENOMEM;
}
memcpy(new_pages, buf->pages,
@@ -721,6 +728,7 @@ static int fw_realloc_buffer(struct firmware_priv
*fw_priv, int min_size)

if (!buf->pages[buf->nr_pages]) {
fw_load_abort(buf);
+ fw_priv->buf = NULL;
return -ENOMEM;
}
buf->nr_pages++;
@@ -805,6 +813,7 @@ static void firmware_class_timeout_work(struct
work_struct *work)
return;
}
fw_load_abort(fw_priv->buf);
+ fw_priv->buf = NULL;
mutex_unlock(&fw_lock);
}

@@ -886,8 +895,6 @@ static int _request_firmware_load(struct
firmware_priv *fw_priv, bool uevent,

cancel_delayed_work_sync(&fw_priv->timeout_work);

- fw_priv->buf = NULL;
-
device_remove_file(f_dev, &dev_attr_loading);
err_del_bin_attr:
device_remove_bin_file(f_dev, &firmware_attr_data);


Thanks,
--
Ming Lei

Attachment: fw-double-abort.patch
Description: Binary data